# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from norincogroup.rules import *
import time
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'catalog'
    # 获取所有列表链接
    index_url = 'https://ar.norincogroup-ebuy.com/getWzpzInfo.do?jsoncallback=jQuery99908154560900100469_{}'.format(
        int(time.time() * 1000))
    # 获取所有商品列表链接
    sku_list_url = 'https://ar.norincogroup-ebuy.com/allthing/index.do?minprice=&maxprice=&cd=&dlcode=&pzname=&title=&order=&orderName=&ecPzType=0&mbname=&payType=&hydm=&searchType=1&skuIdWestern=&searchMode=1&pageNumber={}&pageSize=30&sortColumns=undefined'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 20

    def _build_list_req(self, callback, page, mark=True):
        return Request(url=self.sku_list_url.format(page),
                       callback=callback,
                       errback=self.error_back,
                       meta={
                           'batchNo': self.batch_no,
                           'page': page,
                           'mark': mark
                       },
                       dont_filter=True
                       )

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'batchNo': self.batch_no,
            },
            callback=self.parse_sku_content_deal,
            errback=self.error_back,
            dont_filter=True,
            headers={
                'X-Requested-With': 'XMLHttpRequest',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42'
            }
        )

    # 处理sku列表
    def parse_sku_content_deal(self, response):
        meta = response.meta
        catalog_page = meta.get('catalog_page', '')
        cats = parse_catalog(response)
        self.logger.info('品类: count[%s]' % len(cats))
        yield Box('catalog', self.batch_no, cats)
